##################################################
# Replication Code
# Taeyong Park and Andrew Reeves
# "Local Unemployment and Voting for President: Uncovering Causal Mechanisms"
# Summary: Data Setup for 2008 Analysis
##################################################


rm(list = ls())
library(foreign)
library(stringr)


#########################
#
# I. CREATE MERGED DATA #
#
#########################

#######################
# 1. Import Data Sets #
#######################
# CCES 2008 data
data=get(load("cces_2008_common.RData")) 
# Unemp data 
unempDataLAUS = read.csv("LAUS_CountyData_Unemp0807.csv", stringsAsFactors = F) # Raw Data from the Bureau of Labor Statistics website
# Gas and foreclosures data from www.GasBuddy.com and www.realtytrac.com
econData=read.csv("Combined Economic Data File.csv") 

# foreclosures per 1000 households
econData$forcFeb08 = econData$TotFeb08/econData$HH_2008*1000
econData$forcMar08 = econData$TotMar08/econData$HH_2008*1000
econData$forcApr08 = econData$TotApr08/econData$HH_2008*1000
econData$forcMay08 = econData$TotMay08/econData$HH_2008*1000
econData$forcJune08 = econData$TotJune08/econData$HH_2008*1000
econData$forcJuly08 = econData$TotJuly08/econData$HH_2008*1000
econData$forcAug08 = econData$TotAug08/econData$HH_2008*1000
econData$forcSep08 = econData$TotSep08/econData$HH_2008*1000
econData$forcOct08 = econData$TotOct08/econData$HH_2008*1000

econData$forcMar07 = econData$TotMar07/econData$HH_2008*1000
econData$forcApr07 = econData$TotApr07/econData$HH_2008*1000
econData$forcMay07 = econData$TotMay07/econData$HH_2008*1000
econData$forcJune07 = econData$TotJune07/econData$HH_2008*1000
econData$forcAug07 = econData$TotAug07/econData$HH_2008*1000
econData$forcSep07 = econData$TotSep07/econData$HH_2008*1000
econData$forcOct07 = econData$TotOct07/econData$HH_2008*1000

econDataReduced=econData[, c("AprilGas", "MayGas", "JuneGas", "JulyGas",
                              "AugGas", "SeptGas", "OctGas", "NovGas",
                              "forcFeb08", "forcMar08", "forcApr08",
                              "forcMay08", "forcJune08", "forcJuly08",
                              "forcAug08", "forcSep08", "forcOct08",
                              "forcMar07", "forcApr07", "forcMay07",
                              "forcJune07", "forcAug07", "forcSep07",
                              "forcOct07", "fips", "County")]
colnames(econDataReduced)=c("gasApr08", "gasMay08", "gasJune08", "gasJuly08",
                             "gasAug08", "gasSept08", "gasOct08", "gasNov08",
                             "forcFeb08", "forcMar08", "forcApr08",
                             "forcMay08", "forcJune08", "forcJuly08",
                             "forcAug08", "forcSep08", "forcOct08",
                             "forcMar07", "forcApr07", "forcMay07",
                             "forcJune07", "forcAug07", "forcSep07",
                             "forcOct07", "fips", "county")



#########################
# 2. Merge by fips code #
#########################

## Creat fips code variable for cces
oneDigit=which(str_detect(data$V270, "^\\d{1}$")) # detect one digit
threeDigits=paste("00", data$V270[oneDigit], sep="")
data$V270[oneDigit]=threeDigits

twoDigits=which(str_detect(data$V270, "^\\d{2}$")) # detect two digits
threeDigits=paste("0", data$V270[twoDigits], sep="")
data$V270[twoDigits]=threeDigits

data$fips = paste(data$V265, data$V270, sep="")

## Unemp data ##
fourDigits=which(str_detect(unempDataLAUS$fips, "^\\d{4}$")) # detect four digits
fiveDigits=paste("0", unempDataLAUS$fips[fourDigits], sep="")
unempDataLAUS$fips[fourDigits]=fiveDigits

data$unempJan08 = rep(NA, nrow(data))
data$unempFeb08 = rep(NA, nrow(data))
data$unempMar08 = rep(NA, nrow(data))
data$unempApr08 = rep(NA, nrow(data))
data$unempMay08 = rep(NA, nrow(data))
data$unempJune08 = rep(NA, nrow(data))
data$unempJuly08 = rep(NA, nrow(data))
data$unempAug08 = rep(NA, nrow(data))
data$unempSep08 = rep(NA, nrow(data))
data$unempOct08 = rep(NA, nrow(data))
data$unempNov08 = rep(NA, nrow(data))
data$unempDec08 = rep(NA, nrow(data))
data$unempJan07 = rep(NA, nrow(data))
data$unempFeb07 = rep(NA, nrow(data))
data$unempMar07 = rep(NA, nrow(data))
data$unempApr07 = rep(NA, nrow(data))
data$unempMay07 = rep(NA, nrow(data))
data$unempJune07 = rep(NA, nrow(data))
data$unempJuly07 = rep(NA, nrow(data))
data$unempAug07 = rep(NA, nrow(data))
data$unempSep07 = rep(NA, nrow(data))
data$unempOct07 = rep(NA, nrow(data))
data$unempNov07 = rep(NA, nrow(data))
data$unempDec07 = rep(NA, nrow(data))

for (i in 1:nrow(unempDataLAUS)){
  matched=which(unempDataLAUS$fips[i]==data$fips)
  data$unempJan08[matched] = unempDataLAUS[,2][i]
  data$unempFeb08[matched] = unempDataLAUS[,4][i]
  data$unempMar08[matched] = unempDataLAUS[,6][i]
  data$unempApr08[matched] = unempDataLAUS[,8][i]
  data$unempMay08[matched] = unempDataLAUS[,10][i]
  data$unempJune08[matched] = unempDataLAUS[,12][i]
  data$unempJuly08[matched] = unempDataLAUS[,14][i]
  data$unempAug08[matched] = unempDataLAUS[,16][i]
  data$unempSep08[matched] = unempDataLAUS[,18][i]
  data$unempOct08[matched] = unempDataLAUS[,20][i]
  data$unempNov08[matched] = unempDataLAUS[,22][i]
  data$unempDec08[matched] = unempDataLAUS[,24][i]
  data$unempJan07[matched] = unempDataLAUS[,3][i]
  data$unempFeb07[matched] = unempDataLAUS[,5][i]
  data$unempMar07[matched] = unempDataLAUS[,7][i]
  data$unempApr07[matched] = unempDataLAUS[,9][i]
  data$unempMay07[matched] = unempDataLAUS[,11][i]
  data$unempJune07[matched] = unempDataLAUS[,13][i]
  data$unempJuly07[matched] = unempDataLAUS[,15][i]
  data$unempAug07[matched] = unempDataLAUS[,17][i]
  data$unempSep07[matched] = unempDataLAUS[,19][i]
  data$unempOct07[matched] = unempDataLAUS[,21][i]
  data$unempNov07[matched] = unempDataLAUS[,23][i]
  data$unempDec07[matched] = unempDataLAUS[,25][i]
  if (i %% 100 == 0) print(i)
} 

## Merge econDataReduced
fourDigits=which(str_detect(econDataReduced$fips, "^\\d{4}$")) # detect four digits
fiveDigits=paste("0", econDataReduced$fips[fourDigits], sep="")
econDataReduced$fips[fourDigits]=fiveDigits

dataMerged=merge(data, econDataReduced, by="fips", all.x=T)

## Merge Income Data
incomeData = read.csv("IncomeData0807.csv", stringsAsFactors = F) # The raw data from https://www.census.gov/did/www/saipe/data/interactive/saipe.html?s_appName=saipe&map_yearSelector=2014&map_geoSelector=aa_c&s_year=2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,2005,2004&menu=grid_proxy&s_measures=mhi_snc  (RECODED by the authors)

incomeData$fips = as.character(incomeData$fips)
fourDigits=which(str_detect(incomeData$fips, "^\\d{4}$")) # detect four digits
fiveDigits=paste("0", incomeData$fips[fourDigits], sep="")
incomeData$fips[fourDigits]=fiveDigits

dataMergedIncome = merge(dataMerged, incomeData, by = "fips", all.x=T)



write.csv(dataMergedIncome, "cces08Merged.csv", row.names = F)


###############################
#
# END - I. CREATE MERGED DATA #
#
###############################




###############################
#
# II. DATA SETUP & IMPUTATION #
#
###############################

rm(list = ls())
library(foreign)
library(Amelia)
library(stringr)
data = read.csv("cces08Merged.csv", stringsAsFactors = F)


# countyID
J = length(unique(data$fips))
for (i in 1:J){
  data$countyID[data$fips==unique(data$fips)[i]] = i
}
table(data$countyID)

# stateID
K = length(unique(data$V259))
for (i in 1:K){
  data$stateID[data$V259==unique(data$V259)[i]] = i
}
table(data$stateID)


# Presidential vote
table(as.factor(data$CC410))
data$pvote = as.numeric(as.factor(data$CC410))
data$pvote[as.numeric(as.factor(data$CC410))>7] = NA
data$pvote[as.numeric(as.factor(data$CC410)) == 5] = 1
data$pvote[as.numeric(as.factor(data$CC410)) != 5] = 0
table(data$pvote) # 0 = Others; 1 = McCain --> Vote for Incumbent Party

data$pvote2 = as.numeric(as.factor(data$CC410))
data$pvote2[as.numeric(as.factor(data$CC410)) != 1 & as.numeric(as.factor(data$CC410)) != 5] = NA
data$pvote2[as.numeric(as.factor(data$CC410)) == 5] = 1
data$pvote2[as.numeric(as.factor(data$CC410)) == 1] = 0
table(data$pvote2) # 0 = Obama; 1 = McCain --> Two party 


# Evaluations of national economy
table(as.factor(data$CC302))
data$natecon = as.numeric(as.factor(data$CC302))
data$natecon[data$natecon==5] = NA
data$natecon5 = rep(NA, nrow(data))
data$natecon5[data$natecon==6]=3
data$natecon5[data$natecon==4]=4
data$natecon5[data$natecon==3]=5
data$natecon5[data$natecon==2]=1
data$natecon5[data$natecon==1]=2
table(data$natecon5)



## COVARIATES
# Gender
table(as.factor(data$V208))
data$female = as.numeric(as.factor(data$V208))
data$female[data$female == 2] = 0
data$female[data$female == 1] = 1
table(data$female)

# Age
table(data$V207)
data$age = 2008-data$V207
table(data$age)

# Race
table(as.factor(data$V211))
data$race = as.numeric(as.factor(data$V211))
data$race[data$race != 2 & data$race != 3] = 0
data$race[data$race == 2] = 1 # black
data$race[data$race == 3] = 2 # hispanic

# Employment status
table(as.factor(data$V209))
data$employment = as.numeric(as.factor(data$V209))
data$employment[data$employment != 1 & data$employment != 4 & data$employment != 8 & data$employment != 9] = 0
data$employment[data$employment == 4] = 2 # part-time
data$employment[data$employment == 8 | data$employment == 9] = 3 # unemployed
table(data$employment)

# Income
table(data$V246)
data$income = rep(NA, nrow(data))
data$income[as.numeric(as.factor(data$V246))==14]=1
data$income[as.numeric(as.factor(data$V246))==1]=2
data$income[as.numeric(as.factor(data$V246))==4]=3
data$income[as.numeric(as.factor(data$V246))==6]=4
data$income[as.numeric(as.factor(data$V246))==7]=5
data$income[as.numeric(as.factor(data$V246))==8]=6
data$income[as.numeric(as.factor(data$V246))==9]=7
data$income[as.numeric(as.factor(data$V246))==10]=8
data$income[as.numeric(as.factor(data$V246))==11]=9
data$income[as.numeric(as.factor(data$V246))==12]=10
data$income[as.numeric(as.factor(data$V246))==13]=11
data$income[as.numeric(as.factor(data$V246))==2]=12
data$income[as.numeric(as.factor(data$V246))==3]=13
data$income[as.numeric(as.factor(data$V246))==5]=14
table(data$income)

# Education
table(as.factor(data$V213))
data$educ = as.numeric(as.factor(data$V213))
data$educ[data$educ==5] = 0 # post-grad
data$educ[data$educ==1 | data$educ==6] = 1 # someCollege
table(data$educ) # 2 = fourCollege; 3=HighSchool; 4=noHigh

# Own/Rent
table(as.factor(data$CC333))
data$ownHome = rep(NA, nrow(data))
data$ownHome[as.numeric(as.factor(data$CC333))==3] = 1
data$ownHome[as.numeric(as.factor(data$CC333))!=3] = 0
table(data$ownHome)

# Party ID
table(as.factor(data$CC423))
data$party3 = as.numeric(as.factor(data$CC423))
data$party3[data$party3==3 | data$party3==4] = 0
data$party3[data$party3==5] = 3
table(data$party3) # 1=dem; 2=ind; 3=rep

# Ideology
table(as.factor(data$V243))
data$ideol = rep(NA, nrow(data))
data$ideol[as.numeric(as.factor(data$V243))==4] = NA
data$ideol[as.numeric(as.factor(data$V243))==5] = 1
data$ideol[as.numeric(as.factor(data$V243))==1] = 2
data$ideol[as.numeric(as.factor(data$V243))==3] = 3
data$ideol[as.numeric(as.factor(data$V243))==2] = 4
data$ideol[as.numeric(as.factor(data$V243))==6] = 5
table(data$ideol) # 1=very liberal ... 5=very conservative


# News interest
table(as.factor(data$V244))
data$newsInt = rep(NA, nrow(data))
data$newsInt[as.numeric(as.factor(data$V244))==1]=NA
data$newsInt[as.numeric(as.factor(data$V244))==2]=1
data$newsInt[as.numeric(as.factor(data$V244))==4]=2
data$newsInt[as.numeric(as.factor(data$V244))==5]=3
data$newsInt[as.numeric(as.factor(data$V244))==3]=4
table(data$newsInt)


### Create a data frame: df ###
df=data[, c("V100", "V201", "V259","stateID",
             "county", "countyID", 
             "pvote2", "natecon5",
             "unempJan07",  "unempFeb07",  "unempMar07",   
             "unempApr07", "unempMay07", "unempJune07",  
             "unempJuly07",  "unempAug07", "unempSep07",   
             "unempOct07", "unempNov07", "unempDec07", 
             "unempJan08",  "unempFeb08",  "unempMar08",   
             "unempApr08", "unempMay08", "unempJune08",  
             "unempJuly08",  "unempAug08", "unempSep08",   
             "unempOct08", "unempNov08", "unempDec08", 
             "gasApr08", "gasMay08", "gasJune08",   
             "gasJuly08", "gasAug08", "gasSept08", "gasOct08",    
             "gasNov08",
             "forcFeb08", "forcMar08", "forcApr08", "forcMay08", 
             "forcJune08", "forcJuly08", "forcAug08", "forcSep08", 
             "forcOct08", "forcMar07", "forcApr07", "forcMay07", 
             "forcJune07", "forcAug07", "forcSep07", "forcOct07",
             "inc08", "inc07",
             "age", "female", "race", "employment", "income",
             "educ", "ownHome", "newsInt", "ideol", "party3")]
colnames(df) = c("id", "weight", "state", colnames(df)[-c(1,2,3)])

## Multiple imputations ##
df=df[-(which(is.na(df$pvote2))),]

set.seed(123)
data08Imputed = amelia(df[,-c(3,5)], m=1, ords=c("pvote2", "natecon5", 
                                                  "age", "female", "race",
                                                  "employment", "income", "educ",
                                                  "ownHome", "newsInt", "ideol", "party3"))
write.amelia(data08Imputed, separate=T, "data08Imputed", format="dta")

#####################################
#
# END - II. DATA SETUP & IMPUTATION #
#
#####################################

